# -*- coding: utf-8 -*-
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule


class JobSpider(CrawlSpider):
    name = 'job'
    allowed_domains = ['dbsgw.cn']
    start_urls = ['https://blog.dbsgw.cn/']


    rules = (
        # 链接提取器
        Rule(LinkExtractor(allow=r'https://blog.dbsgw.cn/page/\d+$'), callback='parse_item', follow=True),
    )
    # 提取链接的函数
    def parse_item(self, response):
        print(response.url)
        item = {'url':response.url}
        #item['domain_id'] = response.xpath('//input[@id="sid"]/@value').get()
        #item['name'] = response.xpath('//div[@id="name"]').get()
        #item['description'] = response.xpath('//div[@id="description"]').get()
        return item
